*This file computes the mean outcomes for each of the outcome variables to include in the regression tables.

********************************************************************************
*DEFINE DIRECTORIES
local home CHILD
local main CHILD/JPE
local logs CHILD/JPE/logs
local data CHILD/JPE/data
local results CHILD/JPE/results
local network NETWORK
********************************************************************************

*generate log file
cd "`results'"
cap log close
local time = subinstr(c(current_date)," ","",.)
log using outcome_means_table_by_group_`time'.txt, text replace

*bring in data
cd "`main'"
use regression_data_final_jpe.dta, clear

*merge in single treatment variable
preserve
use all_summary_statistics.dta, clear
keep patientid treated-badfda nd adhd
tempfile temp
save `temp',replace
restore

drop treat
drop drug
drop bad

merge 1:1 patientid using `temp'
keep if _merge==3
drop _merge

*get rid of extra variables
drop tca benzo badfda

*define drug treatment outcome
generate drugs = drug_only+both>0
drop drug_only therapy_only both

*merge in new bad prescribing variables
merge 1:1 patientid using bad_prescribing2.dta
foreach var of varlist fda_ok-red_flag {
	replace `var' = 0 if `var'==.
}
drop _merge

*reorder variables
order treated drugs red_flag grey_area fda_ok, after(patientid)

**incorporate ND diagnosis
drop adhd nd
cd "`home'"
merge 1:1 patientid using neurodevelopmental.dta
keep if _merge==3
drop _merge

*add hurting yourself outcome
cd "`home'"
merge 1:1 patientid using hurting_yourself.dta

*recode kids who don't have hurting yourself records; and hard code to 1's if more than 1
foreach var of varlist before-second_year {
	replace `var' = 0 if `var'==.
	replace `var' = 1 if `var'>1
}
drop _merge

*merge in REVISED DIAGNOSIS INFO
cd "`home'"
merge 1:1 patientid using diagnosis_revised_final.dta
keep if _merge==3
drop _merge

tempfile temp
save `temp', replace

clear

generate group = ""
tempfile temp2
save `temp2', replace


use `temp',clear

*generate above/below median cost variable
foreach x in tot3m tot12m tot24m {
	quietly summarize `x'_orig,d
	generate above_`x' = `x'_orig>=r(p50)
}

*define outcomes
local outcomes tot3m tot12m tot24m mh3m mh12m mh24m above_tot3m above_tot12m above_tot24m hosp3m hosp12m hosp24m  timein3m timein12m timein24m first_three first_year second_year 

generate group = "All"
generate share = 1
collapse (mean) `outcomes' (sum) share, by(group)
by group, s: egen total = sum(share)
replace share = share/total
drop total
generate id = 1
append using `temp2'
save `temp2', replace

use `temp',clear

*generate above/below median cost variable
foreach x in tot3m tot12m tot24m {
	quietly summarize `x'_orig,d
	generate above_`x' = `x'_orig>=r(p50)
}

*define outcomes
local outcomes tot3m tot12m tot24m mh3m mh12m mh24m above_tot3m above_tot12m above_tot24m hosp3m hosp12m hosp24m  timein3m timein12m timein24m first_three first_year second_year 


generate group = "Treated" if treated
replace group = "!Treated" if !treated
generate share = 1
collapse (mean) `outcomes' (sum) share, by(group)
egen total = sum(share)
replace share = share/total
drop total
generate id = 2
append using `temp2'
save `temp2', replace

use `temp',clear
keep if treated

*generate above/below median cost variable
foreach x in tot3m tot12m tot24m {
	quietly summarize `x'_orig,d
	generate above_`x' = `x'_orig>=r(p50)
}

*define outcomes
local outcomes tot3m tot12m tot24m mh3m mh12m mh24m above_tot3m above_tot12m above_tot24m hosp3m hosp12m hosp24m  timein3m timein12m timein24m first_three first_year second_year 

generate group = "Drugs | Treated" if drugs
replace group = "!Drugs | Treated" if !drugs
generate share = 1
collapse (mean) `outcomes' (sum) share, by(group)
egen total = sum(share)
replace share = share/total
drop total
generate id = 3
append using `temp2'
save `temp2', replace

use `temp',clear
keep if drugs

*generate above/below median cost variable
foreach x in tot3m tot12m tot24m {
	quietly summarize `x'_orig,d
	generate above_`x' = `x'_orig>=r(p50)
}

*define outcomes
local outcomes tot3m tot12m tot24m mh3m mh12m mh24m above_tot3m above_tot12m above_tot24m hosp3m hosp12m hosp24m  timein3m timein12m timein24m first_three first_year second_year 

generate group = ""
replace group = "Red Flag | Drugs" if red_flag
replace group = "Grey Area | Drugs" if grey_area
replace group = "FDA-OK | Drugs" if fda_ok
generate share = 1
collapse (mean) `outcomes' (sum) share, by(group)
egen total = sum(share)
replace share = share/total
drop total
generate id = 4
append using `temp2'
save `temp2', replace

sort id
drop id
order share, after(group)
cd "`results'"
export excel using "outcome_means_table_by_group.xlsx", firstrow(variables) replace